#!/usr/bin/perl
# 
# Exercise 10.8
# 
# Given an amino acid, find the frequency of occurrence of the adjacent amino acids 
# coded in a DNA sequence; or in a GenBank library.
#

# Answer to Exercise 10.8
#
# We'll show how to do this for a DNA sequence, and leave the extension to a GenBank
# library as an exercise (just use the technique of Exercise 10.7).
#
# We will approach the problem by finding the codons for the given amino acid, in
# any reading frame, and then just looking at the two adjacent codons.
# You may want to approach the problem by translating the DNA into amino acids in just the
# one reading frame, and when you find the desired amino acid making the count of the
# adjacent amino acids.  You could then use this as a subroutine to easily extend the
# counts to the reverse complement, or to all six reading frames.
#
# We'll use some of the subroutines from Chapter 8 for this program.

use strict;
use warnings;
use BeginPerlBioinfo;
	
#
# Get sequence from fasta file
#
my $fastafile = 'sample.dna';
my @file_data = get_file_data($fastafile);
my $dna = extract_sequence_from_fasta_data(@file_data);

my $peptide = translate_frame($dna, 1);
print $peptide,"\n";


while(my $aa = getuserinput("Count neighbors of what amino acid?: ")) {
	
	my %countadjacentaa = ();

	while($peptide =~ /(.)$aa(.)/g) {

		# Save the adjacent amino acids as $aa1 and $aa2
		my($aa1, $aa2) = ($1, $2);
	
		# Store $aa1
		if(defined $countadjacentaa{$aa1}) {
	    		$countadjacentaa{$aa1}++;
		}else{
	    		$countadjacentaa{$aa1} = 1;
		}

		# Store $aa2
		if(defined $countadjacentaa{$aa2}) {
			$countadjacentaa{$aa2}++;
		}else{
	    		$countadjacentaa{$aa2} = 1;
		}
	}
	
	print "In this sequence, the neighbors of the amino acid $aa have the following frequency:\n";
	#
	# Sort the keys by the count, and output results
	#
	my @sortedkeys = sort {$countadjacentaa{$b} <=> $countadjacentaa{$a}} keys %countadjacentaa;
	
	foreach my $key (@sortedkeys) {
		print "$key ", $countadjacentaa{$key}, "\n";
	}
}

exit;

################################################################################
# Subroutines
################################################################################
#
# getuserinput
#  - prompt the user for a one-line response
#    return null if response is an empty line, q, or quit
#    Otherwise, return answer (with leading and trailing whitespace removed)
#
sub getuserinput {
	my($prompt) = @_;

	print $prompt;
	my $ans = <STDIN>;
	chomp $ans;
	if($ans =~ /^\s*$/ or $ans =~ /^\s*q\s*$/i or $ans =~ /^\s*quit\s*$/i) {
		return '';
	}else{
		$ans =~ s/^\s*//;
		$ans =~ s/\s*$//;
		return $ans;
	}
}
